p_year %>%
inner_join(poems,by=c("p_id")) %>%
count(collection,year) %>%
mutate(measure="yearly count") %>%
union_all(
p_year %>% # 10 year rolling mean
distinct(year) %>%
left_join(p_year %>% distinct(year),sql_on="RHS.year BETWEEN LHS.year-5 AND LHS.year+4") %>%
inner_join(p_year,by=c("year.y"="year")) %>%
inner_join(poems,by=c("p_id")) %>%
group_by(collection=collection,year=year.x) %>%
summarize(n=n()/n_distinct(year.y),.groups="drop") %>%
mutate(measure="10 year rolling mean")
) %>%
filter(collection!="literary",!year %in% c(0,9999)) %>%
mutate(year=if_else(year>=1800,year,1780)) %>%
group_by(collection,measure,year) %>%
summarise(n=sum(n),.groups="drop") %>%
collect() %>%
complete(year,collection,measure,fill=list(n=0)) %>%
mutate(collection=fct_relevel(str_to_upper(collection),"ERAB","SKVR","JR")) %>%
group_by(collection,measure) %>%
arrange(year) %>%
filter(n!=0 | lag(n)!=0 | lead(n)!=0) %>%
ungroup() %>%
mutate(youtlier=n>4600,xoutlier=year<1800) %>%
ggplot(aes(x=year,y=n,color=collection)) +
geom_point(data=~.x %>% filter(measure=="yearly count",youtlier==FALSE),size=0.5) +
geom_point(data=~.x %>% filter(youtlier==TRUE),aes(x=year),y=5000) +
geom_text_repel(data=~.x %>% filter(youtlier==TRUE),aes(x=year,label=scales::number(n)),y=5000, show.legend=FALSE) +
geom_point(data=~.x %>% filter(xoutlier==TRUE,measure=="yearly count"),aes(x=year,y=n)) +
geom_text_repel(data=~.x %>% filter(xoutlier==TRUE,measure=="yearly count"),aes(x=year,y=n,label=scales::number(n)), show.legend=FALSE) +
geom_line(data=~.x %>% filter(xoutlier==FALSE,measure=="10 year rolling mean")) +
theme_hsci_discrete(base_family="Arial") +
theme(
legend.justification=c(0,1),
legend.position=c(0.02, 0.98),
legend.background = element_blank(),
legend.key=element_blank()
) +
labs(color=NULL) +
coord_cartesian(ylim=c(0,4600),xlim=c(1800,1970),clip="off") +
scale_y_continuous(breaks=seq(0,20000,by=1000),labels=scales::number) +
# ylab("Poems") +
ylab("Runojen määrä") +
scale_x_continuous(breaks=seq(1000,2000,by=10)) +
# xlab("Year") +
xlab("Vuosi") +
ggtitle("")
# ggtitle("Runojen määrä vuosittain ja kokoelmittain")
# ggtitle("Number of poems by year and collection")
top_top_themes <- poem_theme %>%
inner_join(poems) %>%
inner_join(themes_to_top_level_themes) %>%
count(collection, ancestor_t_id) %>%
group_by(collection) %>%
slice_max(n,n=9) %>%
ungroup() %>%
mutate(top_theme=TRUE) %>%
select(ancestor_t_id,top_theme) %>%
compute_a(temporary=TRUE, overwrite=TRUE)
d <- p_year %>%
inner_join(poems,by=c("p_id")) %>%
inner_join(poem_theme %>%
inner_join(themes_to_top_level_themes %>%
inner_join(themes %>%
filter(!str_detect(theme_id,"^erab_orig")) %>%
select(ancestor_t_id=t_id,ancestor_theme_name=name)))) %>%
left_join(top_top_themes) %>%
mutate(
ancestor_theme_name=if_else(!is.na(top_theme),ancestor_theme_name,"Muut"),
ancestor_t_id=if_else(!is.na(top_theme),ancestor_t_id,-1),
) %>%
replace_na(list(ancestor_theme_name="Tuntematon", ancestor_t_id=-2)) %>%
distinct(ancestor_t_id,ancestor_theme_name, collection, year, p_id) %>%
count(ancestor_t_id,ancestor_theme_name, collection, year) %>%
mutate(measure="yearly count") %>%
union_all(
p_year %>% # 10 year rolling mean
distinct(year) %>%
left_join(p_year %>% distinct(year),sql_on="RHS.year BETWEEN LHS.year-5 AND LHS.year+4") %>%
inner_join(p_year,by=c("year.y"="year")) %>%
inner_join(poems,by=c("p_id")) %>%
inner_join(poem_theme %>%
inner_join(themes_to_top_level_themes %>%
inner_join(themes %>%
filter(!str_detect(theme_id,"^erab_orig")) %>%
select(ancestor_t_id=t_id,ancestor_theme_name=name)))) %>%
left_join(top_top_themes) %>%
mutate(
ancestor_theme_name=if_else(!is.na(top_theme),ancestor_theme_name,"Muut"),
ancestor_t_id=if_else(!is.na(top_theme),ancestor_t_id,-1),
) %>%
replace_na(list(ancestor_theme_name="Tuntematon", ancestor_t_id=-2)) %>%
distinct(ancestor_t_id,ancestor_theme_name, collection, year.x, year.y, p_id) %>%
group_by(ancestor_t_id,ancestor_theme_name, collection, year=year.x) %>%
summarize(n=n()/n_distinct(year.y),.groups="drop") %>%
mutate(measure="10 year rolling mean")
) %>%
filter(collection!="literary",!year %in% c(0L,9999L)) %>%
mutate(year=if_else(year>=1800L,year,1780L)) %>%
group_by(ancestor_theme_name, collection, measure, year) %>%
summarise(n=sum(n),.groups="drop") %>%
collect()
Warning: Missing values are always removed in SQL aggregation functions.
Use `na.rm = TRUE` to silence this warning
d %>%
mutate(collection=fct_relevel(str_to_upper(collection),"SKVR","ERAB","JR")) %>%
filter(collection=="SKVR") %>%
complete(ancestor_theme_name, year,collection,measure,fill=list(n=0)) %>%
group_by(ancestor_theme_name, collection,measure) %>%
arrange(year) %>%
filter(n!=0 | lag(n)!=0 | lead(n)!=0) %>%
ungroup() %>%
mutate(youtlier=n>1300,xoutlier=year<1800) %>%
ggplot(aes(x=year,y=n,color=ancestor_theme_name)) +
# facet_wrap(~collection) +
geom_point(data=~.x %>% filter(measure=="yearly count",youtlier==FALSE,xoutlier==FALSE),size=0.5) +
geom_point(data=~.x %>% filter(youtlier==TRUE),aes(x=year),y=1400) +
geom_text_repel(data=~.x %>% filter(youtlier==TRUE),aes(x=year,label=scales::number(n)),y=1400, show.legend=FALSE) +
geom_point(data=~.x %>% filter(xoutlier==TRUE,measure=="yearly count"),aes(x=1785,y=n)) +
geom_text_repel(data=~.x %>% filter(xoutlier==TRUE,measure=="yearly count"),aes(x=1785,y=n,label=scales::number(n)), show.legend=FALSE) +
geom_line(data=~.x %>% filter(xoutlier==FALSE,measure=="10 year rolling mean")) +
theme_hsci_discrete(base_family="Arial") +
theme(
legend.justification=c(0,1),
legend.position=c(0.02, 0.98),
legend.background = element_blank(),
legend.key=element_blank()
) +
labs(color=NULL) +
coord_cartesian(ylim=c(0,1300),xlim=c(1800,1940),clip="off") +
scale_y_continuous(breaks=seq(0,20000,by=500),labels=scales::number) +
# ylab("Poems") +
ylab("Runojen määrä") +
scale_x_continuous(breaks=seq(1000,2000,by=10)) +
# xlab("Year") +
xlab("Vuosi") +
ggtitle("")
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `collection = fct_relevel(str_to_upper(collection),
"SKVR", "ERAB", "JR")`.
Caused by warning:
! 1 unknown level in `f`: JR
# ggtitle("Runojen määrä vuosittain ja kokoelmittain")
# ggtitle("Number of poems by year and collection")
d %>%
mutate(collection=fct_relevel(str_to_upper(collection),"SKVR","ERAB")) %>%
filter(collection=="ERAB") %>%
complete(ancestor_theme_name, year,collection,measure,fill=list(n=0)) %>%
group_by(ancestor_theme_name, collection,measure) %>%
arrange(year) %>%
filter(n!=0 | lag(n)!=0 | lead(n)!=0) %>%
ungroup() %>%
mutate(youtlier=n>820,xoutlier=year<1800) %>%
mutate(ancestor_theme_name=case_match(ancestor_theme_name,
"Laulud noorrahva elust" ~ "Laulut nuorison elämästä (Laulud noorrahva elust)",
"Muut" ~ "Muut (sisältää 17 luokkaa)",
"Laulud meelelahutamiseks" ~ "Viihdytyslaulut (Laulud meelelahutamiseks)",
"Lüroeepilised laulud" ~ "Lyroeeppiset laulut (Lüroeepilised laulud)",
"Laulud laulust" ~ "Laulut laulusta (Laulud laulust)",
"Töölaulud" ~ "Työlaulut (Töölaulud)",
"Looduslaulud" ~ "Laulut luonnosta (Looduslaulud)",
"Laulud ühiskondlikest vahekordadest" ~ "Laulut yhteiskunnallisista suhteista\n(Laulud ühiskondlikest vahekordadest)",
"Murelaulud" ~ "Huolilaulut (Murelaulud)",
"Laulud abielust" ~ "Laulut avioelämästä (Laulud abielust)",
"Kalendrilaulud" ~ "Kalendaarilaulut (Kalendrilaulud)"
)) %>%
mutate(ancestor_theme_name=fct_reorder(ancestor_theme_name,n,.fun=sum,.desc=TRUE)) %>%
mutate(ancestor_theme_name=fct_relevel(ancestor_theme_name, "Muut (sisältää 17 luokkaa)", after=Inf)) %>%
ggplot(aes(x=year,y=n,color=ancestor_theme_name)) +
# facet_wrap(~collection) +
geom_point(data=~.x %>% filter(measure=="yearly count",youtlier==FALSE,xoutlier==FALSE),size=0.5) +
geom_point(data=~.x %>% filter(youtlier==TRUE),aes(x=year),y=900) +
geom_text_repel(data=~.x %>% filter(youtlier==TRUE),aes(x=year,label=scales::number(n)),y=900, show.legend=FALSE) +
geom_point(data=~.x %>% filter(xoutlier==TRUE,measure=="yearly count"),aes(x=1785,y=n)) +
geom_text_repel(data=~.x %>% filter(xoutlier==TRUE,measure=="yearly count"),aes(x=1785,y=n,label=scales::number(n)), show.legend=FALSE) +
geom_line(data=~.x %>% filter(xoutlier==FALSE,measure=="10 year rolling mean")) +
theme_hsci_discrete(base_family="Arial") +
theme(
legend.box.just = "top",
legend.justification=c(0,1),
legend.position=c(0.02, 0.98),
legend.background = element_blank(),
legend.key=element_blank()
) +
labs(color=NULL) +
coord_cartesian(ylim=c(0,820),xlim=c(1820,1950),clip="off") +
scale_y_continuous(breaks=seq(0,20000,by=200),labels=scales::number) +
# ylab("Poems") +
ylab("Runojen määrä") +
# guides(color=guide_legend(nrow=2)) +
scale_x_continuous(breaks=seq(1000,2000,by=10)) +
# xlab("Year") +
xlab("Vuosi") +
ggtitle("")
# ggtitle("Runojen määrä vuosittain ja kokoelmittain")
# ggtitle("Number of poems by year and collection")
d %>%
mutate(collection=fct_relevel(str_to_upper(collection),"SKVR","ERAB","JR")) %>%
filter(collection=="JR") %>%
complete(ancestor_theme_name, year,collection,measure,fill=list(n=0)) %>%
group_by(ancestor_theme_name, collection,measure) %>%
arrange(year) %>%
filter(n!=0 | lag(n)!=0 | lead(n)!=0) %>%
ungroup() %>%
mutate(youtlier=n>6500,xoutlier=year<1800) %>%
ggplot(aes(x=year,y=n,color=ancestor_theme_name)) +
# facet_wrap(~collection) +
geom_point(data=~.x %>% filter(measure=="yearly count",youtlier==FALSE),size=0.5) +
geom_point(data=~.x %>% filter(youtlier==TRUE),aes(x=year),y=7200) +
geom_text_repel(data=~.x %>% filter(youtlier==TRUE),aes(x=year,label=scales::number(n)),y=7200, show.legend=FALSE) +
geom_point(data=~.x %>% filter(xoutlier==TRUE,measure=="yearly count"),aes(x=year,y=n)) +
geom_text_repel(data=~.x %>% filter(xoutlier==TRUE,measure=="yearly count"),aes(x=year,y=n,label=scales::number(n)), show.legend=FALSE) +
geom_line(data=~.x %>% filter(xoutlier==FALSE,measure=="10 year rolling mean")) +
theme_hsci_discrete(base_family="Arial") +
theme(
legend.justification=c(0,1),
legend.position=c(0.02, 0.98),
legend.background = element_blank(),
legend.key=element_blank()
) +
labs(color=NULL) +
coord_cartesian(ylim=c(0,6500),xlim=c(1800,1960),clip="off") +
scale_y_continuous(breaks=seq(0,20000,by=1000),labels=scales::number) +
# ylab("Poems") +
ylab("Runojen määrä") +
scale_x_continuous(breaks=seq(1000,2000,by=10)) +
# xlab("Year") +
xlab("Vuosi") +
ggtitle("")
# ggtitle("Runojen määrä vuosittain ja kokoelmittain")
# ggtitle("Number of poems by year and collection")
p_year %>%
filter(year %in% c(0,9999)) %>%
left_join(poems) %>%
count(collection,year) %>%
ungroup() %>%
gt() %>%
tab_header(title=\Abnormal years\) %>%
fmt_integer(n)
poems %>%
distinct(collection) %>%
pull() %>%
map(~p_col %>%
inner_join(poems %>% filter(collection==.x),by=c(\p_id\)) %>%
count(col_id) %>%
left_join(collectors,by=c(\col_id\)) %>%
select(col_id,name,n) %>%
collect() %>%
mutate(col_id=fct_reorder(str_c(col_id,\|\,name),n)) %>%
mutate(col_id=fct_lump_n(col_id,n=100,w=n)) %>%
mutate(col_id=fct_relevel(col_id,\Other\)) %>%
group_by(col_id) %>%
tally(wt=n) %>% {
ggplot(.,aes(x=col_id,y=n)) +
geom_col() +
geom_text(aes(label=p(n)),hjust='left',nudge_y = 100) +
theme_hsci_discrete(base_family=\Arial\) +
coord_flip() +
labs(title=str_c(\Collectors in \,.x))
}
)
[[1]]
[[2]]
[[3]]
[[4]]
p_col %>%
anti_join(collectors) %>%
count(col_id) %>%
gt() %>%
tab_header(title=\Collectors without a name\) %>%
fmt_integer(n)
p_col %>%
inner_join(collectors) %>%
inner_join(poems) %>%
filter(collection!="literary") %>%
mutate(collection=str_to_upper(collection)) %>%
count(collection,col_id,name) %>%
group_by(collection) %>%
slice_max(order_by=n,n=10) %>%
ungroup() %>%
select(-col_id) %>%
gt(groupname_col = "collection", rowname_col = "name") %>%
row_group_order(c("ERAB","SKVR","JR")) %>%
fmt_integer(n,sep_mark=" ")
| n | |
|---|---|
| ERAB | |
| Rosenstrauch, Karl Voldemar | 3 825 |
| Viljak, Karl | 3 225 |
| Ostrov, Mihkel | 2 300 |
| Vilberg (Vilbaste), Gustav | 2 218 |
| Viidalepp (Viidebaum), Richard | 2 134 |
| Kallas, Oskar Philipp | 1 723 |
| Seen, Gustav | 1 473 |
| Penna, Peeter | 1 312 |
| Tampere, Herbert | 1 188 |
| koguja teadmata | 1 175 |
| SKVR | |
| Krohn, Kaarle | 4 110 |
| Paulaharju, Samuli ja Jenni | 3 157 |
| Alava, Vihtori | 3 089 |
| Europaeus, D. E. D. | 2 899 |
| Neovius, A. D. | 2 535 |
| Porkka, Volmari | 2 424 |
| Lönnrot, Elias | 2 402 |
| Perä-Pohjolan ja Lapin Kotiseutuyhdistys | 2 172 |
| Salminen, Väinö | 1 761 |
| Vihervaara, Eemeli | 1 702 |
| JR | |
| Hämeenlinnan alakansakouluseminaari | 6 782 |
| Perä-Pohjolan ja Lapin Kotiseutuyhdistys | 3 463 |
| Kärki, Frans | 3 157 |
| Railonsala, Artturi | 2 452 |
| Paulaharju, Samuli ja Jenni | 2 168 |
| Sääski, Sylvi | 1 666 |
| Saarijärven yhteiskoulu | 1 652 |
| Pennanen, Olavi | 1 448 |
| Paavolainen, Oma Martti | 1 265 |
| Lönnrot, Elias | 1 239 |
p_col %>%
inner_join(collectors) %>%
inner_join(poems) %>%
filter(collection!="literary") %>%
mutate(collection=str_to_upper(collection)) %>%
count(collection,col_id,name) %>%
group_by(collection) %>%
slice_max(order_by=n,n=10) %>%
ungroup() %>%
inner_join(p_col) %>%
inner_join(poem_theme) %>%
inner_join(themes_to_top_level_themes) %>%
count(collection, col_id, t_id=ancestor_t_id) %>%
inner_join(themes %>% rename(theme_name=name)) %>%
inner_join(collectors) %>%
filter(collection=="SKVR") %>%
collect() %>%
# group_by(collection) %>%
# mutate(theme_name=fct_lump_n(theme_name, n, n=5, other_level="Muut")) %>%
# ungroup() %>%
group_by(col_id) %>%
mutate(tn=sum(n)) %>%
ungroup() %>%
mutate(name=fct_reorder(name,tn)) %>%
ggplot(aes(x=name,fill=theme_name,y=n)) +
# facet_wrap(~collection,scales="free",ncol=1) +
geom_col() +
theme_hsci_discrete() +
coord_flip() +
scale_y_continuous(labels=scales::number) +
labs(fill="Päätyyppi") +
xlab("Kerääjä") +
ylab("Tyyppimerkintöjä")
d <- p_loc %>%
count(loc_id) %>%
inner_join(locations) %>%
select(name,n) %>%
collect()
poems_without_location <- poems %>%
anti_join(p_loc) %>%
count() %>%
pull()
unprojected_locations <- d %>%
anti_join(polygons) %>%
add_row(name=NA,n=poems_without_location)
polygons %>%
left_join(d) %>%
tm_shape() +
tm_polygons(col='n', id='name', style='fisher', palette='plasma') +
tm_layout(title=str_c(\Geographical overview. Missing \,unprojected_locations %>% tally(wt=n) %>% pull() %>% p,\ poems.\))
tm_tiles #FFFFFF00 blank Esri.WorldGrayCanvas
OpenStreetMap
Esri.WorldTopoMap base n name tm_fill #666666 solid #0D0887
#0D0887
#BFBFBF
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#BFBFBF
#0D0887
#7E03A8
#7E03A8
#BFBFBF
#0D0887
#7E03A8
#7E03A8
#F89441
#F89441
#CC4678
#CC4678
#0D0887
#0D0887
#0D0887
#BFBFBF
#7E03A8
#0D0887
#BFBFBF
#0D0887
#0D0887
#7E03A8
#0D0887
#0D0887
#0D0887
#0D0887
#F89441
#0D0887
#7E03A8
#7E03A8
#0D0887
#0D0887
#BFBFBF
#BFBFBF
#CC4678
#7E03A8
#0D0887
#0D0887
#7E03A8
#0D0887
#F89441
#0D0887
#7E03A8
#0D0887
#0D0887
#0D0887
#BFBFBF
#0D0887
#0D0887
#0D0887
#0D0887
#CC4678
#0D0887
#0D0887
#0D0887
#CC4678
#0D0887
#0D0887
#0D0887
#0D0887
#BFBFBF
#BFBFBF
#0D0887
#0D0887
#BFBFBF
#7E03A8
#F89441
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#BFBFBF
#0D0887
#7E03A8
#7E03A8
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#7E03A8
#0D0887
#7E03A8
#CC4678
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#7E03A8
#0D0887
#0D0887
#BFBFBF
#0D0887
#0D0887
#0D0887
#BFBFBF
#7E03A8
#0D0887
#0D0887
#7E03A8
#7E03A8
#0D0887
#7E03A8
#0D0887
#7E03A8
#BFBFBF
#7E03A8
#0D0887
#0D0887
#7E03A8
#7E03A8
#7E03A8
#CC4678
#7E03A8
#CC4678
#7E03A8
#7E03A8
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#BFBFBF
#0D0887
#7E03A8
#BFBFBF
#0D0887
#0D0887
#0D0887
#7E03A8
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#7E03A8
#0D0887
#0D0887
#CC4678
#7E03A8
#0D0887
#7E03A8
#0D0887
#F89441
#7E03A8
#F89441
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#7E03A8
#BFBFBF
#CC4678
#CC4678
#7E03A8
#0D0887
#BFBFBF
#CC4678
#7E03A8
#0D0887
#0D0887
#0D0887
#0D0887
#7E03A8
#CC4678
#BFBFBF
#7E03A8
#0D0887
#0D0887
#0D0887
#0D0887
#7E03A8
#7E03A8
#CC4678
#0D0887
#BFBFBF
#0D0887
#7E03A8
#0D0887
#0D0887
#0D0887
#BFBFBF
#0D0887
#F89441
#BFBFBF
#7E03A8
#7E03A8
#7E03A8
#0D0887
#0D0887
#0D0887
#7E03A8
#0D0887
#0D0887
#7E03A8
#0D0887
#7E03A8
#7E03A8
#0D0887
#0D0887
#0D0887
#7E03A8
#0D0887
#CC4678
#BFBFBF
#0D0887
#0D0887
#F0F921
#7E03A8
#0D0887
#0D0887
#0D0887
#7E03A8
#BFBFBF
#BFBFBF
#0D0887
#7E03A8
#0D0887
#BFBFBF
#CC4678
#CC4678
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#7E03A8
#BFBFBF
#0D0887
#BFBFBF
#BFBFBF
#0D0887
#0D0887
#7E03A8
#0D0887
#0D0887
#0D0887
#7E03A8
#F0F921
#0D0887
#0D0887
#CC4678
#BFBFBF
#0D0887
#BFBFBF
#BFBFBF
#0D0887
#0D0887
#7E03A8
#BFBFBF
#0D0887
#7E03A8
#0D0887
#BFBFBF
#BFBFBF
#0D0887
#0D0887
#0D0887
#0D0887
#7E03A8
#BFBFBF
#BFBFBF
#7E03A8
#0D0887
#BFBFBF
#0D0887
#7E03A8
#0D0887
#7E03A8
#0D0887
#BFBFBF
#7E03A8
#0D0887
#7E03A8
#7E03A8
#BFBFBF
#0D0887
#0D0887
#CC4678
#0D0887
#0D0887
#0D0887
#0D0887
#BFBFBF
#0D0887
#0D0887
#BFBFBF
#0D0887
#0D0887
#0D0887
#0D0887
#F89441
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#BFBFBF
#CC4678
#BFBFBF
#7E03A8
#7E03A8
#BFBFBF
#0D0887
#CC4678
#0D0887
#0D0887
#BFBFBF
#BFBFBF
#CC4678
#0D0887
#7E03A8
#BFBFBF
#0D0887
#7E03A8
#0D0887
#0D0887
#0D0887
#BFBFBF
#0D0887
#0D0887
#0D0887
#0D0887
#BFBFBF
#0D0887
#0D0887
#BFBFBF
#BFBFBF
#BFBFBF
#7E03A8
#0D0887
#0D0887
#0D0887
#CC4678
#0D0887
#0D0887
#7E03A8
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#BFBFBF
#0D0887
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#0D0887
#0D0887
#0D0887
#0D0887
#BFBFBF
#7E03A8
#CC4678
#0D0887
#0D0887
#0D0887
#0D0887
#7E03A8
#BFBFBF
#BFBFBF
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#7E03A8
#0D0887
#0D0887
#CC4678
#0D0887
#0D0887
#0D0887
#CC4678
#7E03A8
#7E03A8
#0D0887
#BFBFBF
#CC4678
#0D0887
#BFBFBF
#0D0887
#0D0887
#0D0887
#7E03A8
#0D0887
#0D0887
#0D0887
#0D0887
#BFBFBF
#0D0887
#0D0887
#7E03A8
#BFBFBF
#7E03A8
#BFBFBF
#7E03A8
#0D0887
#7E03A8
#0D0887
#0D0887
#CC4678
#0D0887
#7E03A8
#0D0887
#CC4678
#BFBFBF
#CC4678
#CC4678
#BFBFBF
#BFBFBF
#0D0887
#7E03A8
#0D0887
#0D0887
#0D0887
#7E03A8
#7E03A8
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#7E03A8
#BFBFBF
#7E03A8
#F89441
#0D0887
#0D0887
#BFBFBF
#0D0887
#7E03A8
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#BFBFBF
#0D0887
#0D0887
#0D0887
#BFBFBF
#0D0887
#0D0887
#0D0887
#BFBFBF
#0D0887
#0D0887
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#0D0887
#7E03A8
#0D0887
#7E03A8
#0D0887
#7E03A8
#BFBFBF
#0D0887
#7E03A8
#0D0887
#7E03A8
#7E03A8
#7E03A8
#7E03A8
#CC4678
#CC4678
#0D0887
#7E03A8
#0D0887
#0D0887
#BFBFBF
#F89441
#0D0887
#BFBFBF
#0D0887
#0D0887
#0D0887
#7E03A8
#BFBFBF
#0D0887
#0D0887
#7E03A8
#7E03A8
#0D0887
#0D0887
#7E03A8
#0D0887
#CC4678
#CC4678
#0D0887
#0D0887
#0D0887
#0D0887
#7E03A8
#0D0887
#0D0887
#0D0887
#BFBFBF
#CC4678
#F89441
#7E03A8
#0D0887
#0D0887
#BFBFBF
#0D0887
#0D0887
#0D0887
#F89441
#7E03A8
#0D0887
#CC4678
#0D0887
#CC4678
#CC4678
#0D0887
#BFBFBF
#0D0887
#0D0887
#0D0887
#0D0887
#7E03A8
#0D0887
#BFBFBF
#0D0887
#0D0887
#7E03A8
#0D0887
#7E03A8
#BFBFBF
#0D0887
#0D0887
#0D0887
#0D0887
#CC4678
#7E03A8
#0D0887
#0D0887
#BFBFBF
#0D0887
#BFBFBF
#CC4678
#0D0887
#BFBFBF
#BFBFBF
#0D0887
#BFBFBF
#7E03A8
#BFBFBF
#0D0887
#0D0887
#F89441
#7E03A8
#0D0887
#7E03A8
#7E03A8
#BFBFBF
#7E03A8
#0D0887
#BFBFBF
#BFBFBF
#0D0887
#F0F921
#BFBFBF
#F0F921
#7E03A8
#0D0887
#0D0887
#CC4678
#CC4678
#CC4678
#0D0887
#7E03A8
#0D0887
#7E03A8
#0D0887
#CC4678
#0D0887
#BFBFBF
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#7E03A8
#0D0887
#0D0887
#BFBFBF
#0D0887
#0D0887
#0D0887
#BFBFBF
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#BFBFBF
#BFBFBF
#0D0887
#7E03A8
#7E03A8
#0D0887
#0D0887
#CC4678
#7E03A8
#CC4678
#CC4678
#0D0887
#7E03A8
#7E03A8
#CC4678
#F89441
#BFBFBF
#BFBFBF
#0D0887
#CC4678
#CC4678
#0D0887
#7E03A8
#0D0887
#0D0887
#7E03A8
#0D0887
#BFBFBF
#0D0887
#7E03A8
#0D0887
#7E03A8
#CC4678
#0D0887
#0D0887
#CC4678
#CC4678
#0D0887
#0D0887
#0D0887
#CC4678
#0D0887
#F89441
#BFBFBF
#0D0887
#0D0887
#7E03A8
#0D0887
#BFBFBF
#0D0887
#BFBFBF
#BFBFBF
#BFBFBF
#0D0887
#0D0887
#0D0887
#0D0887
#CC4678
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#CC4678
#7E03A8
#7E03A8
#0D0887
#BFBFBF
#0D0887
#7E03A8
#BFBFBF
#BFBFBF
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#7E03A8
#7E03A8
#0D0887
#7E03A8
#BFBFBF
#0D0887
#BFBFBF
#0D0887
#0D0887
#0D0887
#0D0887
#CC4678
#0D0887
#7E03A8
#0D0887
#0D0887
#7E03A8
#F89441
#7E03A8
#0D0887
#7E03A8
#0D0887
#0D0887
#0D0887
#7E03A8
#0D0887
#0D0887
#BFBFBF
#CC4678
#BFBFBF
#0D0887
#0D0887
#0D0887
#F89441
#F0F921
#CC4678
#BFBFBF
#0D0887
#7E03A8
#BFBFBF
#0D0887
#0D0887
#0D0887
#F0F921
#0D0887
#BFBFBF
#BFBFBF
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#7E03A8
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#0D0887
#BFBFBF
#0D0887
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#F89441
#0D0887
#0D0887
#CC4678
#BFBFBF
#BFBFBF
#BFBFBF
#7E03A8
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#7E03A8
#BFBFBF
#BFBFBF
#0D0887
#0D0887
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#CC4678
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#F89441
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#7E03A8
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#7E03A8
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#BFBFBF
#0D0887
#0D0887
#7E03A8
#0D0887
#BFBFBF
#0D0887
#CC4678
#7E03A8
#7E03A8
#F0F921
#F89441
#7E03A8
#0D0887
#BFBFBF
#F89441 1 to 312
312 to 847
847 to 1,732
1,732 to 3,075
3,075 to 3,985
Missing #0D0887
#7E03A8
#CC4678
#F89441
#F0F921
#BFBFBF #666666 n n name to Less
than or
more metric png Geographical overview. Missing 37,345 poems. #FFFFFF YlOrBr RdYlGn Set3 black #000000 plain #000000 #FFFFFF right vertical left
bottom #000000 plain #000000 plain to Less
than or
more left
top #000000 plain #000000 plain #CCCCCC #000000 plain left bottom right
bottom left
bottom Esri.WorldGrayCanvas
OpenStreetMap
Esri.WorldTopoMap topright left
top grey85
grey40
grey60
black
black
black
grey75
grey95 Missing dummy
. km none km km
unprojected_locations %>%
arrange(desc(n)) %>%
gt() %>%
tab_header(\Poem locations not mapped\) %>%
fmt_integer(n)
d <- p_loc %>%
left_join(poems) %>%
count(collection,loc_id) %>%
ungroup() %>%
inner_join(locations) %>%
select(collection,name,n) %>%
collect()
poems_without_location <- poems %>%
anti_join(p_loc) %>%
count(collection) %>%
collect() %>%
mutate(name=NA_character_)
unprojected_locations <- d %>%
anti_join(polygons) %>%
union_all(poems_without_location)
poems %>%
distinct(collection) %>%
pull() %>%
map(~
tm_shape(
polygons %>%
left_join(
p_loc %>%
inner_join(poems %>% filter(collection==.x),by=c(\p_id\)) %>%
count(loc_id) %>%
inner_join(locations) %>%
select(name,n) %>%
collect()
)
) +
tm_polygons(col='n', id='name', style='fisher', palette='plasma') +
tm_layout(title=str_c(\Geography of \,.x,\. Missing \,unprojected_locations %>% filter(collection==.x) %>% tally(wt=n) %>% pull() %>% p,\ poems.\))
)
[[1]]
[[2]]
[[3]]
[[4]]
poems %>%
distinct(collection) %>%
pull() %>%
map(~
unprojected_locations %>%
filter(collection==.x) %>%
arrange(desc(n)) %>%
select(-collection) %>%
gt() %>%
tab_header(str_c(\Poem locations not mapped in \,.x)) %>%
fmt_integer(n)
)
raw_meta %>%
filter(field=="INF") %>%
mutate(value_c=str_replace_all(value,"^\\s*[A-Za-zÅÄÖåäö][a-zåäö][a-zåäö]+\\.","")) %>%
mutate(name=str_replace_all(value_c,"\\s*\"*([A-Za-zÅÄÖåäö]?[a-zåäö]?[a-zåäö]?\\.?[^;.,]+)(.|\\n)*","\\1")) %>%
group_by(name) %>%
summarise(origs=str_flatten(sql("distinct value"),collapse="|"),n=n(),.groups="drop") %>%
collect()
poems %>%
filter(collection!="literary") %>%
left_join(poem_theme %>% filter(is_minor==0) %>% inner_join(themes %>% mutate(theme_type=if_else(str_detect(theme_id,"^erab_orig"),"Non-unified","Unified")))) %>%
group_by(collection,p_id) %>%
summarise(theme_type=case_when(
any(theme_type=="Unified") ~ "Systematisoituja",
any(theme_type=="Non-unified") ~ "Vain ei-systematisoituja",
T ~ "Ei annotointeja"), .groups="drop") %>%
count(collection,theme_type) %>%
collect() %>%
mutate(collection=fct_rev(fct_relevel(str_to_upper(collection),"ERAB","SKVR","JR")),theme_type=fct_rev(fct_relevel(theme_type,"Systematisoituja","Vain ei-systematisoituja","Ei annotointeja"))) %>%
ggplot(aes(x=collection,y=n,fill=theme_type)) +
geom_col() +
theme_hsci_discrete() +
xlab("Kokoelma") +
ylab("Runoja") +
labs(fill="Runotyyppiannotaatiot") +
theme(legend.position="bottom") +
guides(fill = guide_legend(reverse = TRUE)) +
scale_y_continuous(labels=scales::number) +
coord_flip()
d <- poems %>%
left_join(p_year %>% mutate(year=if_else(year %in% c(0L,9999L),NA,year))) %>%
collect() %>%
mutate(year_ntile=ntile(year,11)) %>%
group_by(year_ntile) %>%
mutate(years=str_c(min(year),\-\,max(year))) %>%
ungroup() %>%
left_join(p_loc %>% collect()) %>%
count(years,loc_id) %>%
ungroup() %>%
left_join(locations %>% select(loc_id,name) %>% collect())
polygons %>%
left_join(d %>% complete(name,years)) %>%
tm_shape() +
tm_polygons(col='n', id='name', style='fisher', palette='plasma') +
tm_layout(main.title=\Geographical overviews by time\,legend.outside.size=0.1) +
tm_facets(by=\years\,ncol=4)
poem_stats %>%
filter(nverses<=75) %>%
inner_join(poems) %>%
count(collection,nverses) %>%
ungroup() %>%
ggplot(aes(x=nverses,y=n)) +
geom_col(width=1) +
facet_wrap(~collection,scales="free_y") +
theme_hsci_discrete(base_family="Arial") +
scale_y_continuous(labels=scales::comma_format()) +
xlab("Number of characters") +
ylab("Verses") +
labs(title="Number of verse lines")
poem_stats %>%
inner_join(poems) %>%
count(collection,nverses) %>%
ungroup() %>%
group_by(collection) %>%
mutate(prop=n/sum(n)) %>%
ungroup() %>%
filter(nverses<=75) %>%
ggplot(aes(x=nverses,y=collection,fill=collection,height=prop)) +
geom_density_ridges(stat='identity') +
theme_hsci_discrete(base_family="Arial") +
# scale_y_continuous(labels=scales::percent_format()) +
xlab("Number of verse lines") +
ylab("Poems") +
labs(title="Number of verse lines")
poem_stats %>%
inner_join(poems) %>%
count(collection,nverses) %>%
mutate(nl=if_else(nverses>75,n,0L)) %>%
group_by(collection) %>%
summarise(lines=sum(nl),proportion=sum(nl)/sum(n),.groups="drop") %>%
arrange(desc(lines)) %>%
gt() %>%
tab_header(title="Poems with more than 75 verses") %>%
fmt_integer(lines) %>%
fmt_percent(proportion)
| Poems with more than 75 verses | ||
| collection | lines | proportion |
|---|---|---|
| skvr | 2,370 | 2.66% |
| erab | 1,899 | 1.90% |
| jr | 1,288 | 1.51% |
| literary | 114 | 15.24% |
poem_stats %>%
left_join(p_loc) %>%
left_join(locations) %>%
left_join(locations,by=c("par_id"="loc_id")) %>%
mutate(name=if_else(type.x=="county",name.x,name.y)) %>%
count(name,nverses) %>%
ungroup() %>%
group_by(name) %>%
mutate(prop=n/sum(n)) %>%
ungroup() %>%
filter(nverses<=40,name!="Ahvenanmaa") %>%
collect() %>%
mutate(name=fct_reorder(name,prop,.fun=max)) %>%
ggplot(aes(x=nverses,y=name,height=prop)) +
geom_density_ridges(stat='identity') +
theme_hsci_continuous(base_family="Arial") +
# scale_y_continuous(labels=scales::percent_format()) +
xlab("Number of verse lines") +
ylab("Poems") +
guides(fill="none") +
labs(title="Number of verse lines by county")
d <- verses %>%
left_join(verse_poem) %>%
left_join(poems) %>%
count(collection,type) %>%
ungroup() %>%
arrange(collection,desc(n)) %>%
collect()
d %>%
group_by(collection) %>%
mutate(proportion=n/sum(n)) %>%
gt() %>%
fmt_integer(n) %>%
fmt_percent(proportion)
| type | n | proportion |
|---|---|---|
| skvr | ||
| V | 1,340,987 | 94.63% |
| L | 44,303 | 3.13% |
| CPT | 27,869 | 1.97% |
| K | 3,931 | 0.28% |
| erab | ||
| V | 1,861,583 | 93.39% |
| PAG | 53,040 | 2.66% |
| CPT | 19,844 | 1.00% |
| L | 18,465 | 0.93% |
| TYH | 18,357 | 0.92% |
| REF | 17,869 | 0.90% |
| LRY | 3,868 | 0.19% |
| RRE | 307 | 0.02% |
| MRK | 52 | 0.00% |
| U | 38 | 0.00% |
| LLI | 2 | 0.00% |
| TYP | 1 | 0.00% |
| jr | ||
| V | 812,343 | 90.94% |
| L | 49,411 | 5.53% |
| CPT | 28,030 | 3.14% |
| K | 3,502 | 0.39% |
| literary | ||
| V | 82,460 | 97.54% |
| L | 1,220 | 1.44% |
| CPT | 777 | 0.92% |
| K | 87 | 0.10% |
d_nr_characters <- verses_cl %>%
mutate(nr_characters=str_length(text)) %>%
left_join(verse_poem) %>%
left_join(poems) %>%
count(collection,nr_characters) %>%
ungroup() %>%
arrange(collection,desc(n)) %>%
collect()
d_nr_words <- word_occ %>%
group_by(v_id) %>%
summarise(nr_words=max(pos),.groups="drop") %>%
left_join(verse_poem) %>%
left_join(poems) %>%
count(collection,nr_words) %>%
ungroup() %>%
arrange(collection,desc(n)) %>%
collect()
d_nr_characters %>%
filter(nr_characters<=60) %>%
ggplot(aes(x=nr_characters,y=n)) +
geom_col(width=1) +
facet_wrap(~collection,scales=\free_y\) +
theme_hsci_discrete(base_family=\Arial\) +
scale_y_continuous(labels=scales::comma_format()) +
xlab(\Number of characters\) +
ylab(\Verses\) +
labs(title=\Number of characters in verse lines\)
d_nr_characters %>%
group_by(collection) %>%
mutate(prop=n/sum(n)) %>%
ungroup() %>%
filter(nr_characters<=60) %>%
ggplot(aes(x=nr_characters,y=collection,fill=collection,height=prop)) +
geom_density_ridges(stat='identity') +
theme_hsci_discrete(base_family=\Arial\) +
# scale_y_continuous(labels=scales::percent_format()) +
xlab(\Number of characters\) +
ylab(\Verses\) +
labs(title=\Number of characters in verse lines\)
d_nr_characters %>%
mutate(nl=if_else(nr_characters>60,n,0L)) %>%
group_by(collection) %>%
summarise(lines=sum(nl),proportion=sum(nl)/sum(n),.groups=\drop\) %>%
arrange(desc(lines)) %>%
gt() %>%
tab_header(title=\Verse lines with more than 60 characters\) %>%
fmt_integer(lines) %>%
fmt_percent(proportion)
d_nr_words %>%
filter(nr_words<=10) %>%
ggplot(aes(x=nr_words,y=n)) +
geom_col(width=1) +
facet_wrap(~collection,scales=\free_y\) +
scale_x_continuous(breaks=seq(0,10,by=2)) +
scale_y_continuous(labels=scales::comma_format()) +
theme_hsci_discrete(base_family=\Arial\) +
xlab(\Number of words\) +
ylab(\Verses\) +
labs(title=\Number of words in verse lines\)
d_nr_words %>%
filter(nr_words<=10) %>%
uncount(n) %>%
ggplot(aes(x=nr_words,y=collection,fill=collection)) +
stat_binline(binwidth=1) +
theme_hsci_discrete(base_family=\Arial\) +
scale_x_continuous(breaks=seq(0,10,by=2)) +
xlab(\Number of words\) +
ylab(\Verses\) +
# scale_y_continuous(labels=scales::percent_format()) +
labs(title=\Number of words in verse lines\)
d_nr_words %>%
mutate(nl=if_else(nr_words>10,n,0L)) %>%
group_by(collection) %>%
summarise(lines=sum(nl),proportion=sum(nl)/sum(n),.groups=\drop\) %>%
arrange(desc(lines)) %>%
gt() %>%
tab_header(title=\Verse lines with more than 10 words\) %>%
fmt_integer(lines) %>%
fmt_percent(proportion)
verse_nr_words <- word_occ %>%
group_by(v_id) %>%
summarise(nr_words=max(pos)) %>%
compute_a(unique_indexes=list(c(\v_id\,\nr_words\)))
word_nr_characters <- words %>%
mutate(nr_characters=str_length(text)) %>%
select(w_id,nr_characters) %>%
compute_a(unique_indexes=list(c(\w_id\,\nr_characters\)))
d <- word_occ %>%
left_join(word_nr_characters) %>%
left_join(verse_nr_words) %>%
left_join(verse_poem %>% select(-pos),by=c(\v_id\)) %>%
left_join(poems) %>%
count(collection,nr_words,pos,nr_characters) %>%
collect()
d_nr_characters <- verses_cl %>%
mutate(nr_characters=str_length(text)) %>%
left_join(verse_poem) %>%
left_join(p_loc) %>%
left_join(locations) %>%
left_join(locations,by=c("par_id"="loc_id")) %>%
mutate(name=if_else(type.x=="county",name.x,name.y)) %>%
count(name,nr_characters) %>%
ungroup() %>%
arrange(name,desc(n)) %>%
collect()
d_nr_words <- word_occ %>%
group_by(v_id) %>%
summarise(nr_words=max(pos),.groups="drop") %>%
left_join(verse_poem) %>%
left_join(p_loc) %>%
left_join(locations) %>%
left_join(locations,by=c("par_id"="loc_id")) %>%
mutate(name=if_else(type.x=="county",name.x,name.y)) %>%
count(name,nr_words) %>%
ungroup() %>%
arrange(name,desc(n)) %>%
collect()
d_nr_characters %>%
group_by(name) %>%
mutate(prop=n/sum(n)) %>%
ungroup() %>%
filter(nr_characters<=40,name!="Ahvenanmaa") %>%
mutate(name=fct_reorder(name,prop,.fun=max)) %>%
ggplot(aes(x=nr_characters,y=name,height=prop)) +
geom_density_ridges(stat='identity') +
theme_hsci_discrete(base_family="Arial") +
# scale_y_continuous(labels=scales::percent_format()) +
xlab("Number of characters") +
ylab("Verses") +
labs(title="Number of characters in verse lines")
d_nr_words %>%
filter(nr_words<8,name!="Ahvenanmaa") %>%
mutate(name=fct_reorder(name,n,.fun=max)) %>%
uncount(n) %>%
ggplot(aes(x=nr_words,y=name)) +
stat_binline(binwidth=1,scale=0.9) +
theme_hsci_discrete(base_family="Arial") +
scale_x_continuous(breaks=seq(0,10,by=2)) +
xlab("Number of words") +
ylab("Verses") +
# scale_y_continuous(labels=scales::percent_format()) +
labs(title="Number of words in verse lines")
verse_nr_words <- word_occ %>%
group_by(v_id) %>%
summarise(nr_words=max(pos)) %>%
compute_a(unique_indexes=list(c("v_id","nr_words")))
word_nr_characters <- words %>%
mutate(nr_characters=str_length(text)) %>%
select(w_id,nr_characters) %>%
compute_a(unique_indexes=list(c("w_id","nr_characters")))
d <- word_occ %>%
left_join(word_nr_characters) %>%
left_join(verse_nr_words) %>%
left_join(verse_poem %>% select(-pos),by=c("v_id")) %>%
left_join(poems) %>%
count(collection,nr_words,pos,nr_characters) %>%
collect()
d %>%
group_by(collection,nr_words,pos) %>%
mutate(prop=n/sum(n)) %>%
ungroup() %>%
filter(nr_words>=2L,nr_words<=5L) %>%
mutate(nr_words=as_factor(nr_words),pos=as_factor(pos)) %>%
uncount(n) %>%
ggplot(aes(x=nr_characters,y=nr_words,fill=pos)) +
stat_binline(binwidth=1) +
facet_grid(collection~pos,labeller = labeller(pos=label_both)) +
xlab(\Number of characters in word\) +
ylab(\Number of words in verse\) +
labs(
title=\Number of characters in words by their position\,
subtitle=\According to length of verse and collection\
) +
guides(fill=\none\) +
theme_hsci_discrete(base_family=\Arial\)
d %>%
group_by(collection,nr_words,pos) %>%
mutate(prop=n/sum(n)) %>%
ungroup() %>%
filter(nr_words>=2L,nr_words<=5L) %>%
mutate(nr_words=as_factor(nr_words),pos=as_factor(pos)) %>%
uncount(n) %>%
ggplot(aes(x=nr_characters,y=pos,fill=nr_words)) +
stat_binline(binwidth=1) +
facet_grid(collection~nr_words,labeller = labeller(nr_words=label_both)) +
xlab(\Number of characters in word\) +
ylab(\Position\) +
labs(
title=\Number of characters in words by their position\,
subtitle=\According to length of verse and collection\
) +
guides(fill=\none\) +
theme_hsci_discrete(base_family=\Arial\)